################################################################################
## Paper:     Working the Crowd
## Authors:   P. Mongrain, N. Fréchet, B. Thompson Collart, and Y. Dufresne
## Date:      February 2025
################################################################################

################################################################################
## LOAD PACKAGES
################################################################################

library(data.table)
library(survey)
library(tidyverse)
library(anesrake)
library(haven)
library(foreign)
library(weights)
library(caret)
library(lme4)
library(ecospat)
library(survival)
library(sandwich)
library(lmtest)
library(miceadds)
library(boot)
library(loo)
library(rstanarm)
library(bayesplot)

################################################################################
## SET WORKING DIRECTORY
################################################################################

getwd()

setwd("C:/...") #add appropriate path here

################################################################################
## LOAD DATA
################################################################################

#Note: These files need to be downloaded from their original sources (see the "surveys.text" file)

nhs2011 <- read_dta("nhs2011_pumf.dta")

nhs2011 <- subset(nhs2011, nhs2011$agegrp >= 7) #restrict data to adults

pum2016 <- read_dta("Census_2016_Individual_PUMF.dta")

pum2016 <- subset(pum2016, pum2016$agegrp >= 7) #restrict data to adults

pum2021 <- read_dta("cen_ind_2021_pumf_v2.dta")

pum2021 <- subset(pum2021, pum2021$agegrp >= 7) #restrict data to adults

################################################################################
## RECODE VARIABLES: CANADA 2011
################################################################################

nhs2011.ca2011 <- nhs2011

nhs2011.ca2011$sex_ca2011_t <- nhs2011.ca2011$sex
nhs2011.ca2011$age_ca2011_t <- nhs2011.ca2011$agegrp
nhs2011.ca2011$education_ca2011_t <- nhs2011.ca2011$hdgree
nhs2011.ca2011$income_ca2011_t <- nhs2011.ca2011$hhinc
nhs2011.ca2011$province_ca2011_t <- nhs2011.ca2011$pr

# Sex

nhs2011.ca2011$sex_ca2011_t[nhs2011.ca2011$sex_ca2011_t==1] <- 1 #female
nhs2011.ca2011$sex_ca2011_t[nhs2011.ca2011$sex_ca2011_t==2] <- 2 #male

# Age

nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==7] <- 1 #18-19
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==8] <- 2 #20-24
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==9] <- 3 #25-29
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==10] <- 4 #30-34
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==11] <- 5 #35-39
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==12] <- 6 #40-44
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==13] <- 7 #45-49
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==14] <- 8 #50-54
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==15] <- 9 #55-59
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==16] <- 10 #60-64
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==17] <- 11 #65-69
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==18] <- 12 #70-74
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==19] <- 13 #75-79
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==20] <- 14 #80-84
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==21] <- 15 #85+
nhs2011.ca2011$age_ca2011_t[nhs2011.ca2011$age_ca2011_t==88] <- NA #not available

# Education

nhs2011.ca2011$education_ca2011_t[nhs2011.ca2011$education_ca2011_t==1] <- 1 #below high school
nhs2011.ca2011$education_ca2011_t[nhs2011.ca2011$education_ca2011_t==2] <- 2 #high school
nhs2011.ca2011$education_ca2011_t[nhs2011.ca2011$education_ca2011_t>=3 & nhs2011.ca2011$education_ca2011_t<=8] <- 3 #postsecondary below bachelor
nhs2011.ca2011$education_ca2011_t[nhs2011.ca2011$education_ca2011_t>=9 & nhs2011.ca2011$education_ca2011_t<=11] <- 4 #university undergraduate degree
nhs2011.ca2011$education_ca2011_t[nhs2011.ca2011$education_ca2011_t>=12 & nhs2011.ca2011$education_ca2011_t<=13] <- 5 #university graduate degree
nhs2011.ca2011$education_ca2011_t[nhs2011.ca2011$education_ca2011_t==88] <- NA #not available
nhs2011.ca2011$education_ca2011_t[nhs2011.ca2011$education_ca2011_t==99] <- NA #not applicable

# Household income before tax

nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t>=1 & nhs2011.ca2011$income_ca2011_t<=4] <- 1 #under $10,000
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t>=5 & nhs2011.ca2011$income_ca2011_t<=6] <- 2 #$10,000-$14,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t>=7 & nhs2011.ca2011$income_ca2011_t<=8] <- 3 #$15,000-$19,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t==9] <- 4 #$20,000-$24,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t==10] <- 5 #$25,000-$29,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t==11] <- 6 #$30,000-$34,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t==12] <- 7 #$35,000-$39,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t==13] <- 8 #$40,000-$44,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t>=14 & nhs2011.ca2011$income_ca2011_t<=15] <- 9 #$45,000-$54,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t==16] <- 10 #$55,000-$59,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t>=17 & nhs2011.ca2011$income_ca2011_t<=18] <- 11 #$60,000-$69,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t>=19 & nhs2011.ca2011$income_ca2011_t<=20] <- 12 #$70,000-$79,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t>=21 & nhs2011.ca2011$income_ca2011_t<=24] <- 13 #$80,000-$99,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t>=25 & nhs2011.ca2011$income_ca2011_t<=29] <- 14 #$100,000-$149,999
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t>=30 & nhs2011.ca2011$income_ca2011_t<=33] <- 15 #$150,000 or more
nhs2011.ca2011$income_ca2011_t[nhs2011.ca2011$income_ca2011_t==88] <- NA #not available

# Province

nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==10] <- 1 #Newfoundland and Labrador
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==11] <- 2 #Prince Edward Island
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==12] <- 3 #Nova Scotia
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==13] <- 4 #New Brunswick
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==24] <- 5 #Quebec
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==35] <- 6 #Ontario
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==46] <- 7 #Manitoba
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==47] <- 8 #Saskatchewan
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==48] <- 9 #Alberta
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==59] <- 10 #British Columbia
nhs2011.ca2011$province_ca2011_t[nhs2011.ca2011$province_ca2011_t==60] <- 11 #Northern Canada

################################################################################
## RECODE VARIABLES: ONTARIO 2011
################################################################################

nhs2011.on2011 <- subset(nhs2011, nhs2011$pr == 35) #keep Ontario only

nhs2011.on2011$sex_on2011_t <- nhs2011.on2011$sex
nhs2011.on2011$age_on2011_t <- nhs2011.on2011$agegrp
nhs2011.on2011$education_on2011_t <- nhs2011.on2011$hdgree
nhs2011.on2011$income_on2011_t <- nhs2011.on2011$hhinc

# Sex

nhs2011.on2011$sex_on2011_t[nhs2011.on2011$sex_on2011_t==1] <- 1 #female
nhs2011.on2011$sex_on2011_t[nhs2011.on2011$sex_on2011_t==2] <- 2 #male

# Age

nhs2011.on2011$age_on2011_t[nhs2011.on2011$age_on2011_t>=7 & nhs2011.on2011$age_on2011_t<=8] <- 1 #18-24
nhs2011.on2011$age_on2011_t[nhs2011.on2011$age_on2011_t>=9 & nhs2011.on2011$age_on2011_t<=10] <- 2 #25-34
nhs2011.on2011$age_on2011_t[nhs2011.on2011$age_on2011_t>=11 & nhs2011.on2011$age_on2011_t<=12] <- 3 #35-44
nhs2011.on2011$age_on2011_t[nhs2011.on2011$age_on2011_t>=13 & nhs2011.on2011$age_on2011_t<=14] <- 4 #45-54
nhs2011.on2011$age_on2011_t[nhs2011.on2011$age_on2011_t>=15 & nhs2011.on2011$age_on2011_t<=16] <- 5 #55-64
nhs2011.on2011$age_on2011_t[nhs2011.on2011$age_on2011_t>=17] <- 6 #65+
nhs2011.on2011$age_on2011_t[nhs2011.ca2011$age_on2011_t==88] <- NA #not available

# Education

nhs2011.on2011$education_on2011_t[nhs2011.on2011$education_on2011_t==1] <- 1 #below high school
nhs2011.on2011$education_on2011_t[nhs2011.on2011$education_on2011_t==2] <- 2 #high school
nhs2011.on2011$education_on2011_t[nhs2011.on2011$education_on2011_t>=3 & nhs2011.on2011$education_on2011_t<=8] <- 3 #postsecondary below bachelor
nhs2011.on2011$education_on2011_t[nhs2011.on2011$education_on2011_t>=9 & nhs2011.on2011$education_on2011_t<=11] <- 4 #university undergraduate degree
nhs2011.on2011$education_on2011_t[nhs2011.on2011$education_on2011_t>=12 & nhs2011.on2011$education_on2011_t<=13] <- 5 #university graduate degree
nhs2011.on2011$education_on2011_t[nhs2011.on2011$education_on2011_t==88] <- NA #not available
nhs2011.on2011$education_on2011_t[nhs2011.on2011$education_on2011_t==99] <- NA #not applicable

# Household income before tax

nhs2011.on2011$income_on2011_t[nhs2011.on2011$income_on2011_t>=1 & nhs2011.on2011$income_on2011_t<=5] <- 1 #under $30,000
nhs2011.on2011$income_on2011_t[nhs2011.on2011$income_on2011_t>=6 & nhs2011.on2011$income_on2011_t<=16] <- 2 #$30,000-$59,999
nhs2011.on2011$income_on2011_t[nhs2011.on2011$income_on2011_t>=17 & nhs2011.on2011$income_on2011_t<=33] <- 3 #$60,000 or more
nhs2011.on2011$income_on2011_t[nhs2011.on2011$income_on2011_t==88] <- NA #not available

################################################################################
## RECODE VARIABLES: ONTARIO 2014
################################################################################

nhs2011.on2014 <- subset(nhs2011, nhs2011$pr == 35) #keep Ontario only

nhs2011.on2014$sex_on2014_t <- nhs2011.on2014$sex
nhs2011.on2014$age_on2014_t <- nhs2011.on2014$agegrp
nhs2011.on2014$education_on2014_t <- nhs2011.on2014$hdgree
nhs2011.on2014$income_on2014_t <- nhs2011.on2014$hhinc

# Sex

nhs2011.on2014$sex_on2014_t[nhs2011.on2014$sex_on2014_t==1] <- 1 #female
nhs2011.on2014$sex_on2014_t[nhs2011.on2014$sex_on2014_t==2] <- 2 #male

# Age

nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==7] <- 1 #18-19
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==8] <- 2 #20-24
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==9] <- 3 #25-29
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==10] <- 4 #30-34
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==11] <- 5 #35-39
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==12] <- 6 #40-44
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==13] <- 7 #45-49
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==14] <- 8 #50-54
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==15] <- 9 #55-59
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==16] <- 10 #60-64
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==17] <- 11 #65-69
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==18] <- 12 #70-74
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==19] <- 13 #75-79
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==20] <- 14 #80-84
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==21] <- 15 #85+
nhs2011.on2014$age_on2014_t[nhs2011.on2014$age_on2014_t==88] <- NA #not available

# Education

nhs2011.on2014$education_on2014_t[nhs2011.on2014$education_on2014_t==1] <- 1 #below high school
nhs2011.on2014$education_on2014_t[nhs2011.on2014$education_on2014_t==2] <- 2 #high school
nhs2011.on2014$education_on2014_t[nhs2011.on2014$education_on2014_t>=3 & nhs2011.on2014$education_on2014_t<=8] <- 3 #postsecondary below bachelor
nhs2011.on2014$education_on2014_t[nhs2011.on2014$education_on2014_t>=9 & nhs2011.on2014$education_on2014_t<=13] <- 4 #university degree
nhs2011.on2014$education_on2014_t[nhs2011.on2014$education_on2014_t==88] <- NA #not available
nhs2011.on2014$education_on2014_t[nhs2011.on2014$education_on2014_t==99] <- NA #not applicable

# Household income before tax

nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t>=1 & nhs2011.on2014$income_on2014_t<=2] <- 1 #under $5,000
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t>=3 & nhs2011.on2014$income_on2014_t<=4] <- 2 #$5,000-$9,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t>=5 & nhs2011.on2014$income_on2014_t<=6] <- 3 #$10,000-$14,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t>=7 & nhs2011.on2014$income_on2014_t<=8] <- 4 #$15,000-$19,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==9] <- 5 #$20,000-$24,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==10] <- 6 #$25,000-$29,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==11] <- 7 #$30,000-$34,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==12] <- 8 #$35,000-$39,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==13] <- 9 #$40,000-$44,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==14] <- 10 #$45,000-$49,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==15] <- 11 #$50,000-$54,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==16] <- 12 #$55,000-$59,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==17] <- 13 #$60,000-$64,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==18] <- 14 #$65,000-$69,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==19] <- 15 #$70,000-$74,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==20] <- 16 #$75,000-$79,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t>=21 & nhs2011.on2014$income_on2014_t<=22] <- 17 #$80,000-$89,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t>=23 & nhs2011.on2014$income_on2014_t<=24] <- 18 #$90,000-$99,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t>=25 & nhs2011.on2014$income_on2014_t<=29] <- 19 #$100,000-$149,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t>=30 & nhs2011.on2014$income_on2014_t<=31] <- 20 #$150,000-$199,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==32] <- 21 #$200,000-$249,999
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==33] <- 22 #$250,000 or more
nhs2011.on2014$income_on2014_t[nhs2011.on2014$income_on2014_t==88] <- NA #not available

################################################################################
## RECODE VARIABLES: CANADA 2015
################################################################################

nhs2011.ca2015 <- nhs2011

nhs2011.ca2015$sex_ca2015_t <- nhs2011.ca2015$sex
nhs2011.ca2015$age_ca2015_t <- nhs2011.ca2015$agegrp
nhs2011.ca2015$education_ca2015_t <- nhs2011.ca2015$hdgree
nhs2011.ca2015$income_ca2015_t <- nhs2011.ca2015$hhinc
nhs2011.ca2015$province_ca2015_t <- nhs2011.ca2015$pr

# Sex

nhs2011.ca2015$sex_ca2015_t[nhs2011.ca2015$sex_ca2015_t==1] <- 1 #female
nhs2011.ca2015$sex_ca2015_t[nhs2011.ca2015$sex_ca2015_t==2] <- 2 #male

# Age

nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==7] <- 1 #18-19
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==8] <- 2 #20-24
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==9] <- 3 #25-29
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==10] <- 4 #30-34
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==11] <- 5 #35-39
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==12] <- 6 #40-44
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==13] <- 7 #45-49
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==14] <- 8 #50-54
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==15] <- 9 #55-59
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==16] <- 10 #60-64
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==17] <- 11 #65-69
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==18] <- 12 #70-74
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==19] <- 13 #75-79
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==20] <- 14 #80-84
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==21] <- 15 #85+
nhs2011.ca2015$age_ca2015_t[nhs2011.ca2015$age_ca2015_t==88] <- NA #not available

# Education

nhs2011.ca2015$education_ca2015_t[nhs2011.ca2015$education_ca2015_t==1] <- 1 #below high school
nhs2011.ca2015$education_ca2015_t[nhs2011.ca2015$education_ca2015_t==2] <- 2 #high school
nhs2011.ca2015$education_ca2015_t[nhs2011.ca2015$education_ca2015_t>=3 & nhs2011.ca2015$education_ca2015_t<=8] <- 3 #postsecondary below bachelor
nhs2011.ca2015$education_ca2015_t[nhs2011.ca2015$education_ca2015_t>=9 & nhs2011.ca2015$education_ca2015_t<=11] <- 4 #university undergraduate degree
nhs2011.ca2015$education_ca2015_t[nhs2011.ca2015$education_ca2015_t>=12 & nhs2011.ca2015$education_ca2015_t<=13] <- 5 #university graduate degree
nhs2011.ca2015$education_ca2015_t[nhs2011.ca2015$education_ca2015_t==88] <- NA #not available
nhs2011.ca2015$education_ca2015_t[nhs2011.ca2015$education_ca2015_t==99] <- NA #not applicable

# Household income before tax

nhs2011.ca2015$income_ca2015_t[nhs2011.ca2015$income_ca2015_t>=1 & nhs2011.ca2015$income_ca2015_t<=8] <- 1 #under $20,000
nhs2011.ca2015$income_ca2015_t[nhs2011.ca2015$income_ca2015_t>=9 & nhs2011.ca2015$income_ca2015_t<=12] <- 2 #$20,000-$39,999
nhs2011.ca2015$income_ca2015_t[nhs2011.ca2015$income_ca2015_t>=13 & nhs2011.ca2015$income_ca2015_t<=16] <- 3 #$40,000-$59,999
nhs2011.ca2015$income_ca2015_t[nhs2011.ca2015$income_ca2015_t>=17 & nhs2011.ca2015$income_ca2015_t<=20] <- 4 #$60,000-$79,999
nhs2011.ca2015$income_ca2015_t[nhs2011.ca2015$income_ca2015_t>=21 & nhs2011.ca2015$income_ca2015_t<=24] <- 5 #$80,000-$99,999
nhs2011.ca2015$income_ca2015_t[nhs2011.ca2015$income_ca2015_t>=25 & nhs2011.ca2015$income_ca2015_t<=29] <- 6 #$100,000-$149,999
nhs2011.ca2015$income_ca2015_t[nhs2011.ca2015$income_ca2015_t>=30 & nhs2011.ca2015$income_ca2015_t<=31] <- 7 #$150,000-$199,999
nhs2011.ca2015$income_ca2015_t[nhs2011.ca2015$income_ca2015_t>=32 & nhs2011.ca2015$income_ca2015_t<=33] <- 8 #$200,000 or more
nhs2011.ca2015$income_ca2015_t[nhs2011.ca2015$income_ca2015_t==88] <- NA #not available

# Province

nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==10] <- 1 #Newfoundland and Labrador
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==11] <- 2 #Prince Edward Island
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==12] <- 3 #Nova Scotia
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==13] <- 4 #New Brunswick
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==24] <- 5 #Quebec
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==35] <- 6 #Ontario
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==46] <- 7 #Manitoba
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==47] <- 8 #Saskatchewan
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==48] <- 9 #Alberta
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==59] <- 10 #British Columbia
nhs2011.ca2015$province_ca2015_t[nhs2011.ca2015$province_ca2015_t==60] <- NA #Northern Canada (missing from Ipsos/LPP data)

################################################################################
## RECODE VARIABLES: CANADA 2019 (2016 CENSUS)
################################################################################

pum2016.ca2019 <- pum2016

pum2016.ca2019$sex_ca2019_t <- pum2016.ca2019$sex
pum2016.ca2019$age_ca2019_t <- pum2016.ca2019$agegrp
pum2016.ca2019$education_ca2019_t <- pum2016.ca2019$hdgree
pum2016.ca2019$income_ca2019_t <- pum2016.ca2019$hhinc
pum2016.ca2019$province_ca2019_t <- pum2016.ca2019$pr

# Sex

pum2016.ca2019$sex_ca2019_t[pum2016.ca2019$sex_ca2019_t==1] <- 1 #female
pum2016.ca2019$sex_ca2019_t[pum2016.ca2019$sex_ca2019_t==2] <- 2 #male

# Age

pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==7] <- 1 #18-19
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==8] <- 2 #20-24
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==9] <- 3 #25-29
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==10] <- 4 #30-34
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==11] <- 5 #35-39
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==12] <- 6 #40-44
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==13] <- 7 #45-49
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==14] <- 8 #50-54
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==15] <- 9 #55-59
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==16] <- 10 #60-64
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==17] <- 11 #65-69
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==18] <- 12 #70-74
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==19] <- 13 #75-79
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==20] <- 14 #80-84
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==21] <- 15 #85+
pum2016.ca2019$age_ca2019_t[pum2016.ca2019$age_ca2019_t==88] <- NA #not available

# Education

pum2016.ca2019$education_ca2019_t[pum2016.ca2019$education_ca2019_t==1] <- 1 #below high school
pum2016.ca2019$education_ca2019_t[pum2016.ca2019$education_ca2019_t==2] <- 2 #high school
pum2016.ca2019$education_ca2019_t[pum2016.ca2019$education_ca2019_t>=3 & pum2016.ca2019$education_ca2019_t<=8] <- 3 #postsecondary below bachelor
pum2016.ca2019$education_ca2019_t[pum2016.ca2019$education_ca2019_t>=9 & pum2016.ca2019$education_ca2019_t<=11] <- 4 #university undergraduate degree
pum2016.ca2019$education_ca2019_t[pum2016.ca2019$education_ca2019_t>=12 & pum2016.ca2019$education_ca2019_t<=13] <- 5 #university graduate degree
pum2016.ca2019$education_ca2019_t[pum2016.ca2019$education_ca2019_t==88] <- NA #not available
pum2016.ca2019$education_ca2019_t[pum2016.ca2019$education_ca2019_t==99] <- NA #not applicable

# Household income before tax

pum2016.ca2019$income_ca2019_t[pum2016.ca2019$income_ca2019_t>=1 & pum2016.ca2019$income_ca2019_t<=10] <- 1 #under $30,000
pum2016.ca2019$income_ca2019_t[pum2016.ca2019$income_ca2019_t>=11 & pum2016.ca2019$income_ca2019_t<=16] <- 2 #$30,000-$59,999
pum2016.ca2019$income_ca2019_t[pum2016.ca2019$income_ca2019_t>=17 & pum2016.ca2019$income_ca2019_t<=22] <- 3 #$60,000-$89,999
pum2016.ca2019$income_ca2019_t[pum2016.ca2019$income_ca2019_t>=23 & pum2016.ca2019$income_ca2019_t<=25] <- 4 #$90,000-$109,999
pum2016.ca2019$income_ca2019_t[pum2016.ca2019$income_ca2019_t>=26 & pum2016.ca2019$income_ca2019_t<=29] <- 5 #$110,000-$149,999
pum2016.ca2019$income_ca2019_t[pum2016.ca2019$income_ca2019_t>=30 & pum2016.ca2019$income_ca2019_t<=31] <- 6 #$150,000-$199,999
pum2016.ca2019$income_ca2019_t[pum2016.ca2019$income_ca2019_t>=32 & pum2016.ca2019$income_ca2019_t<=33] <- 7 #$200,000 or more
pum2016.ca2019$income_ca2019_t[pum2016.ca2019$income_ca2019_t==88] <- NA #not available

# Province

pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==10] <- 1 #Newfoundland and Labrador
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==11] <- 2 #Prince Edward Island
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==12] <- 3 #Nova Scotia
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==13] <- 4 #New Brunswick
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==24] <- 5 #Quebec
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==35] <- 6 #Ontario
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==46] <- 7 #Manitoba
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==47] <- 8 #Saskatchewan
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==48] <- 9 #Alberta
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==59] <- 10 #British Columbia
pum2016.ca2019$province_ca2019_t[pum2016.ca2019$province_ca2019_t==70] <- 11 #Northern Canada

################################################################################
## RECODE VARIABLES: CANADA 2019 (2021 CENSUS)
################################################################################

pum2021.ca2019 <- pum2021

pum2021.ca2019$sex_ca2019_t <- pum2021.ca2019$Gender
pum2021.ca2019$age_ca2019_t <- pum2021.ca2019$agegrp
pum2021.ca2019$education_ca2019_t <- pum2021.ca2019$hdgree
pum2021.ca2019$income_ca2019_t <- pum2021.ca2019$HHInc
pum2021.ca2019$province_ca2019_t <- pum2021.ca2019$pr

# Sex

pum2021.ca2019$sex_ca2019_t[pum2021.ca2019$sex_ca2019_t==1] <- 1 #female
pum2021.ca2019$sex_ca2019_t[pum2021.ca2019$sex_ca2019_t==2] <- 2 #male

# Age

pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==7] <- 1 #18-19
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==8] <- 2 #20-24
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==9] <- 3 #25-29
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==10] <- 4 #30-34
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==11] <- 5 #35-39
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==12] <- 6 #40-44
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==13] <- 7 #45-49
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==14] <- 8 #50-54
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==15] <- 9 #55-59
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==16] <- 10 #60-64
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==17] <- 11 #65-69
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==18] <- 12 #70-74
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==19] <- 13 #75-79
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==20] <- 14 #80-84
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==21] <- 15 #85+
pum2021.ca2019$age_ca2019_t[pum2021.ca2019$age_ca2019_t==88] <- NA #not available

# Education

pum2021.ca2019$education_ca2019_t[pum2021.ca2019$education_ca2019_t==1] <- 1 #below high school
pum2021.ca2019$education_ca2019_t[pum2021.ca2019$education_ca2019_t==2] <- 2 #high school
pum2021.ca2019$education_ca2019_t[pum2021.ca2019$education_ca2019_t>=3 & pum2021.ca2019$education_ca2019_t<=8] <- 3 #postsecondary below bachelor
pum2021.ca2019$education_ca2019_t[pum2021.ca2019$education_ca2019_t>=9 & pum2021.ca2019$education_ca2019_t<=11] <- 4 #university undergraduate degree
pum2021.ca2019$education_ca2019_t[pum2021.ca2019$education_ca2019_t>=12 & pum2021.ca2019$education_ca2019_t<=13] <- 5 #university graduate degree
pum2021.ca2019$education_ca2019_t[pum2021.ca2019$education_ca2019_t==88] <- NA #not available
pum2021.ca2019$education_ca2019_t[pum2021.ca2019$education_ca2019_t==99] <- NA #not applicable

# Household income before tax

pum2021.ca2019$income_ca2019_t[pum2021.ca2019$income_ca2019_t>=1 & pum2021.ca2019$income_ca2019_t<=10] <- 1 #under $30,000
pum2021.ca2019$income_ca2019_t[pum2021.ca2019$income_ca2019_t>=11 & pum2021.ca2019$income_ca2019_t<=16] <- 2 #$30,000-$59,999
pum2021.ca2019$income_ca2019_t[pum2021.ca2019$income_ca2019_t>=17 & pum2021.ca2019$income_ca2019_t<=22] <- 3 #$60,000-$89,999
pum2021.ca2019$income_ca2019_t[pum2021.ca2019$income_ca2019_t>=23 & pum2021.ca2019$income_ca2019_t<=25] <- 4 #$90,000-$109,999
pum2021.ca2019$income_ca2019_t[pum2021.ca2019$income_ca2019_t>=26 & pum2021.ca2019$income_ca2019_t<=29] <- 5 #$110,000-$149,999
pum2021.ca2019$income_ca2019_t[pum2021.ca2019$income_ca2019_t>=30 & pum2021.ca2019$income_ca2019_t<=31] <- 6 #$150,000-$199,999
pum2021.ca2019$income_ca2019_t[pum2021.ca2019$income_ca2019_t>=32 & pum2021.ca2019$income_ca2019_t<=33] <- 7 #$200,000 or more
pum2021.ca2019$income_ca2019_t[pum2021.ca2019$income_ca2019_t==88] <- NA #not available

# Province

pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==10] <- 1 #Newfoundland and Labrador
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==11] <- 2 #Prince Edward Island
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==12] <- 3 #Nova Scotia
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==13] <- 4 #New Brunswick
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==24] <- 5 #Quebec
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==35] <- 6 #Ontario
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==46] <- 7 #Manitoba
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==47] <- 8 #Saskatchewan
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==48] <- 9 #Alberta
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==59] <- 10 #British Columbia
pum2021.ca2019$province_ca2019_t[pum2021.ca2019$province_ca2019_t==70] <- 11 #Northern Canada

################################################################################
## RECODE VARIABLES: QUEBEC 2022 (2016 CENSUS)
################################################################################

pum2016.qc2022 <- subset(pum2016, pum2016$pr == 24) #keep Quebec only

pum2016.qc2022$sex_qc2022_t <- pum2016.qc2022$sex
pum2016.qc2022$age_qc2022_t <- pum2016.qc2022$agegrp
pum2016.qc2022$education_qc2022_t <- pum2016.qc2022$hdgree
pum2016.qc2022$income_qc2022_t <- pum2016.qc2022$hhinc

# Sex

pum2016.qc2022$sex_qc2022_t[pum2016.qc2022$sex_qc2022_t==1] <- 1 #female
pum2016.qc2022$sex_qc2022_t[pum2016.qc2022$sex_qc2022_t==2] <- 2 #male

# Age

pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==7] <- 1 #18-19
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==8] <- 2 #20-24
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==9] <- 3 #25-29
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==10] <- 4 #30-34
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==11] <- 5 #35-39
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==12] <- 6 #40-44
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==13] <- 7 #45-49
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==14] <- 8 #50-54
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==15] <- 9 #55-59
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==16] <- 10 #60-64
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==17] <- 11 #65-69
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==18] <- 12 #70-74
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==19] <- 13 #75-79
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==20] <- 14 #80-84
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==21] <- 15 #85+
pum2016.qc2022$age_qc2022_t[pum2016.qc2022$age_qc2022_t==88] <- NA #not available

# Education

pum2016.qc2022$education_qc2022_t[pum2016.qc2022$education_qc2022_t==1] <- 1 #below high school
pum2016.qc2022$education_qc2022_t[pum2016.qc2022$education_qc2022_t==2] <- 2 #high school
pum2016.qc2022$education_qc2022_t[pum2016.qc2022$education_qc2022_t>=3 & pum2016.qc2022$education_qc2022_t<=8] <- 3 #postsecondary below bachelor
pum2016.qc2022$education_qc2022_t[pum2016.qc2022$education_qc2022_t>=9 & pum2016.qc2022$education_qc2022_t<=11] <- 4 #university undergraduate degree
pum2016.qc2022$education_qc2022_t[pum2016.qc2022$education_qc2022_t>=12 & pum2016.qc2022$education_qc2022_t<=13] <- 5 #university graduate degree
pum2016.qc2022$education_qc2022_t[pum2016.qc2022$education_qc2022_t==88] <- NA #not available
pum2016.qc2022$education_qc2022_t[pum2016.qc2022$education_qc2022_t==99] <- NA #not applicable

# Household income before tax

pum2016.qc2022$income_qc2022_t[pum2016.qc2022$income_qc2022_t>=1 & pum2016.qc2022$income_qc2022_t<=10] <- 1 #under $30,000
pum2016.qc2022$income_qc2022_t[pum2016.qc2022$income_qc2022_t>=11 & pum2016.qc2022$income_qc2022_t<=16] <- 2 #$30,000-$59,999
pum2016.qc2022$income_qc2022_t[pum2016.qc2022$income_qc2022_t>=17 & pum2016.qc2022$income_qc2022_t<=22] <- 3 #$60,000-$89,999
pum2016.qc2022$income_qc2022_t[pum2016.qc2022$income_qc2022_t>=23 & pum2016.qc2022$income_qc2022_t<=25] <- 4 #$90,000-$109,999
pum2016.qc2022$income_qc2022_t[pum2016.qc2022$income_qc2022_t>=26 & pum2016.qc2022$income_qc2022_t<=29] <- 5 #$110,000-$149,999
pum2016.qc2022$income_qc2022_t[pum2016.qc2022$income_qc2022_t>=30 & pum2016.qc2022$income_qc2022_t<=31] <- 6 #$150,000-$199,999
pum2016.qc2022$income_qc2022_t[pum2016.qc2022$income_qc2022_t>=32 & pum2016.qc2022$income_qc2022_t<=33] <- 7 #$200,000 or more
pum2016.qc2022$income_qc2022_t[pum2016.qc2022$income_qc2022_t==88] <- NA #not available

################################################################################
## RECODE VARIABLES: QUEBEC 2022 (2021 CENSUS)
################################################################################

pum2021.qc2022 <- subset(pum2021, pum2021$pr == 24) #keep Quebec only

pum2021.qc2022$sex_qc2022_t <- pum2021.qc2022$Gender
pum2021.qc2022$age_qc2022_t <- pum2021.qc2022$agegrp
pum2021.qc2022$education_qc2022_t <- pum2021.qc2022$hdgree
pum2021.qc2022$income_qc2022_t <- pum2021.qc2022$HHInc

# Sex

pum2021.qc2022$sex_qc2022_t[pum2021.qc2022$sex_qc2022_t==1] <- 1 #female
pum2021.qc2022$sex_qc2022_t[pum2021.qc2022$sex_qc2022_t==2] <- 2 #male

# Age

pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==7] <- 1 #18-19
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==8] <- 2 #20-24
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==9] <- 3 #25-29
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==10] <- 4 #30-34
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==11] <- 5 #35-39
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==12] <- 6 #40-44
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==13] <- 7 #45-49
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==14] <- 8 #50-54
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==15] <- 9 #55-59
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==16] <- 10 #60-64
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==17] <- 11 #65-69
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==18] <- 12 #70-74
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==19] <- 13 #75-79
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==20] <- 14 #80-84
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==21] <- 15 #85+
pum2021.qc2022$age_qc2022_t[pum2021.qc2022$age_qc2022_t==88] <- NA #not available

# Education

pum2021.qc2022$education_qc2022_t[pum2021.qc2022$education_qc2022_t==1] <- 1 #below high school
pum2021.qc2022$education_qc2022_t[pum2021.qc2022$education_qc2022_t==2] <- 2 #high school
pum2021.qc2022$education_qc2022_t[pum2021.qc2022$education_qc2022_t>=3 & pum2021.qc2022$education_qc2022_t<=8] <- 3 #postsecondary below bachelor
pum2021.qc2022$education_qc2022_t[pum2021.qc2022$education_qc2022_t>=9 & pum2021.qc2022$education_qc2022_t<=11] <- 4 #university undergraduate degree
pum2021.qc2022$education_qc2022_t[pum2021.qc2022$education_qc2022_t>=12 & pum2021.qc2022$education_qc2022_t<=13] <- 5 #university graduate degree
pum2021.qc2022$education_qc2022_t[pum2021.qc2022$education_qc2022_t==88] <- NA #not available
pum2021.qc2022$education_qc2022_t[pum2021.qc2022$education_qc2022_t==99] <- NA #not applicable

# Household income before tax

pum2021.qc2022$income_qc2022_t[pum2021.qc2022$income_qc2022_t>=1 & pum2021.qc2022$income_qc2022_t<=10] <- 1 #under $30,000
pum2021.qc2022$income_qc2022_t[pum2021.qc2022$income_qc2022_t>=11 & pum2021.qc2022$income_qc2022_t<=16] <- 2 #$30,000-$59,999
pum2021.qc2022$income_qc2022_t[pum2021.qc2022$income_qc2022_t>=17 & pum2021.qc2022$income_qc2022_t<=22] <- 3 #$60,000-$89,999
pum2021.qc2022$income_qc2022_t[pum2021.qc2022$income_qc2022_t>=23 & pum2021.qc2022$income_qc2022_t<=25] <- 4 #$90,000-$109,999
pum2021.qc2022$income_qc2022_t[pum2021.qc2022$income_qc2022_t>=26 & pum2021.qc2022$income_qc2022_t<=29] <- 5 #$110,000-$149,999
pum2021.qc2022$income_qc2022_t[pum2021.qc2022$income_qc2022_t>=30 & pum2021.qc2022$income_qc2022_t<=31] <- 6 #$150,000-$199,999
pum2021.qc2022$income_qc2022_t[pum2021.qc2022$income_qc2022_t>=32 & pum2021.qc2022$income_qc2022_t<=33] <- 7 #$200,000 or more
pum2021.qc2022$income_qc2022_t[pum2021.qc2022$income_qc2022_t==88] <- NA #not available

################################################################################
## SURVEY DESIGN: CANADA 2011
################################################################################

# Create survey design object using 2011 NHS and weights

svy.nhs2011.ca2011 <- svydesign(ids=~1, data=nhs2011.ca2011, weights=nhs2011.ca2011$weight) 

# Create targets

sex_ca2011_w <- svytable(~sex_ca2011_t, design=svy.nhs2011.ca2011) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

age_ca2011_w <- svytable(~age_ca2011_t, design=svy.nhs2011.ca2011) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

education_ca2011_w <- svytable(~education_ca2011_t, design=svy.nhs2011.ca2011) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

income_ca2011_w <- svytable(~income_ca2011_t, design=svy.nhs2011.ca2011) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

province_ca2011_w <- svytable(~province_ca2011_t, design=svy.nhs2011.ca2011) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

# Create target vectors

sex_ca2011_w <- c(0.514, 0.486)
age_ca2011_w <- c(0.033, 0.083, 0.083, 0.083, 0.083, 0.089, 0.102, 0.102, 
                  0.090, 0.079, 0.058, 0.043, 0.033, 0.023, 0.016)
income_ca2011_w  <- c(0.035, 0.022, 0.033, 0.030, 0.037, 0.039, 0.040, 
                      0.041, 0.081, 0.039, 0.075, 0.070, 0.122, 0.191, 0.146)
education_ca2011_w  <- c(0.170, 0.262, 0.350, 0.169, 0.049)
province_ca2011_w  <- c(0.016, 0.004, 0.028, 0.023, 0.239, 0.383, 0.035, 
                        0.030, 0.106, 0.134, 0.003)

# Combine the individual vectors into a list

targets.ca2011 <- list(sex_ca2011_w, age_ca2011_w, education_ca2011_w, province_ca2011_w, income_ca2011_w)
names(targets.ca2011) <- c("sex_ca2011_w", "age_ca2011_w", "education_ca2011_w", "province_ca2011_w", "income_ca2011_w")

################################################################################
## SURVEY DESIGN: ONTARIO 2011
################################################################################

# Create survey design object using 2011 NHS and weights

svy.nhs2011.on2011 <- svydesign(ids=~1, data=nhs2011.on2011, weights=nhs2011.on2011$weight) 

# Create targets

sex_on2011_w <- svytable(~sex_on2011_t, design=svy.nhs2011.on2011) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

age_on2011_w <- svytable(~age_on2011_t, design=svy.nhs2011.on2011) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

education_on2011_w <- svytable(~education_on2011_t, design=svy.nhs2011.on2011) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

income_on2011_w <- svytable(~income_on2011_t, design=svy.nhs2011.on2011) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

# Create target vectors

sex_on2011_w <- c(0.519, 0.481)
age_on2011_w <- c(0.119, 0.160, 0.175, 0.206, 0.162, 0.178)
education_on2011_w  <- c(0.153, 0.276, 0.326, 0.188, 0.058)
income_on2011_w  <- c(0.038, 0.320, 0.642)

# Combine the individual vectors into a list

targets.on2011 <- list(sex_on2011_w, age_on2011_w, education_on2011_w, income_on2011_w)
names(targets.on2011) <- c("sex_on2011_w", "age_on2011_w", "education_on2011_w", "income_on2011_w")

################################################################################
## SURVEY DESIGN: ONTARIO 2014
################################################################################

# Create survey design object using 2011 NHS and weights

svy.nhs2011.on2014 <- svydesign(ids=~1, data=nhs2011.on2014, weights=nhs2011.on2014$weight) 

# Create targets

sex_on2014_w <- svytable(~sex_on2014_t, design=svy.nhs2011.on2014) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

age_on2014_w <- svytable(~age_on2014_t, design=svy.nhs2011.on2014) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

education_on2014_w <- svytable(~education_on2014_t, design=svy.nhs2011.on2014) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

income_on2014_w <- svytable(~income_on2014_t, design=svy.nhs2011.on2014) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

# Create target vectors

sex_on2014_w <- c(0.519, 0.481)
age_on2014_w <- c(0.035, 0.085, 0.081, 0.079, 0.084, 0.092, 0.105, 0.101, 
                  0.086, 0.077, 0.056, 0.043, 0.034, 0.024, 0.017)
education_on2014_w  <- c(0.153, 0.276, 0.326, 0.246)
income_on2014_w  <- c(0.019, 0.012, 0.019, 0.026, 0.028, 0.032, 0.034, 
                      0.036, 0.038, 0.038, 0.038, 0.037, 0.036, 0.036, 
                      0.034, 0.034, 0.064, 0.059, 0.206, 0.098, 0.037, 0.039)

# Combine the individual vectors into a list

targets.on2014 <- list(sex_on2014_w, age_on2014_w, education_on2014_w, income_on2014_w)
names(targets.on2014) <- c("sex_on2014_w", "age_on2014_w", "education_on2014_w", "income_on2014_w")

################################################################################
## SURVEY DESIGN: CANADA 2015
################################################################################

# Create survey design object using 2011 NHS and weights

svy.nhs2011.ca2015 <- svydesign(ids=~1, data=nhs2011.ca2015, weights=nhs2011.ca2015$weight) 

# Create targets

sex_ca2015_w <- svytable(~sex_ca2015_t, design=svy.nhs2011.ca2015) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

age_ca2015_w <- svytable(~age_ca2015_t, design=svy.nhs2011.ca2015) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

education_ca2015_w <- svytable(~education_ca2015_t, design=svy.nhs2011.ca2015) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

income_ca2015_w <- svytable(~income_ca2015_t, design=svy.nhs2011.ca2015) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

province_ca2015_w <- svytable(~province_ca2015_t, design=svy.nhs2011.ca2015) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

# Create target vectors

sex_ca2015_w <- c(0.514, 0.486)
age_ca2015_w <- c(0.033, 0.083, 0.083, 0.083, 0.083, 0.089, 0.102, 0.102, 
                  0.090, 0.079, 0.058, 0.043, 0.033, 0.023, 0.016)
education_ca2015_w  <- c(0.170, 0.262, 0.350, 0.169, 0.049)
income_ca2015_w  <- c(0.089, 0.146, 0.160, 0.145, 0.122, 0.191, 0.082, 0.063)
province_ca2015_w  <- c(0.016, 0.004, 0.029, 0.023, 0.239, 0.384, 0.035, 
                        0.030, 0.106, 0.135)

# Combine the individual vectors into a list

targets.ca2015 <- list(sex_ca2015_w, age_ca2015_w, education_ca2015_w, income_ca2015_w, province_ca2015_w)
names(targets.ca2015) <- c("sex_ca2015_w", "age_ca2015_w", "education_ca2015_w", "income_ca2015_w", "province_ca2015_w")

################################################################################
## SURVEY DESIGN: CANADA 2019 (CENSUS 2016)
################################################################################

# Create survey design object using 2016 Census PUM and weights

svy.pum2016.ca2019 <- svydesign(ids=~1, data=pum2016.ca2019, weights=pum2016.ca2019$weight) 

# Create targets

sex_ca2019_w <- svytable(~sex_ca2019_t, design=svy.pum2016.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

age_ca2019_w <- svytable(~age_ca2019_t, design=svy.pum2016.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

education_ca2019_w <- svytable(~education_ca2019_t, design=svy.pum2016.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

income_ca2019_w <- svytable(~income_ca2019_t, design=svy.pum2016.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

province_ca2019_w <- svytable(~province_ca2019_t, design=svy.pum2016.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

# Create target vectors

sex_ca2019_w <- c(0.513, 0.487)
age_ca2019_w <- c(0.030, 0.081, 0.083, 0.085, 0.083, 0.081, 0.085, 0.098, 
                  0.095, 0.082, 0.070, 0.050, 0.035, 0.024, 0.018)
education_ca2019_w  <- c(0.154, 0.275, 0.331, 0.185, 0.056)
income_ca2019_w  <- c(0.117, 0.212, 0.200, 0.112, 0.159, 0.105, 0.094)
province_ca2019_w  <- c(0.015, 0.004, 0.027, 0.022, 0.232, 0.385, 
                        0.035, 0.030, 0.112, 0.135, 0.003)

# Combine the individual vectors into a list

targets.ca2019 <- list(sex_ca2019_w, age_ca2019_w, education_ca2019_w, province_ca2019_w, income_ca2019_w)
names(targets.ca2019) <- c("sex_ca2019_w", "age_ca2019_w", "education_ca2019_w", "province_ca2019_w", "income_ca2019_w")

################################################################################
## SURVEY DESIGN: CANADA 2019 (CENSUS 2021)
################################################################################

# Create survey design object using 2021 Census PUM and weights

svy.pum2021.ca2019 <- svydesign(ids=~1, data=pum2021.ca2019, weights=pum2021.ca2019$weight) 

# Create targets

sex_ca2019_w <- svytable(~sex_ca2019_t, design=svy.pum2021.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

age_ca2019_w <- svytable(~age_ca2019_t, design=svy.pum2021.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

education_ca2019_w <- svytable(~education_ca2019_t, design=svy.pum2021.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

income_ca2019_w <- svytable(~income_ca2019_t, design=svy.pum2021.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

province_ca2019_w <- svytable(~province_ca2019_t, design=svy.pum2021.ca2019) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

# Create target vectors

sex_ca2019_w <- c(0.511, 0.489)
age_ca2019_w <- c(0.027, 0.075, 0.083, 0.086, 0.086, 0.082, 0.078, 0.081, 
                  0.091, 0.087, 0.075, 0.062, 0.042, 0.026, 0.021)
education_ca2019_w  <- c(0.132, 0.274, 0.319, 0.207, 0.068)
income_ca2019_w  <- c(0.074, 0.168, 0.186, 0.114, 0.180, 0.137, 0.141)
province_ca2019_w  <- c(0.014, 0.004, 0.027, 0.021, 0.228, 0.388, 
                        0.035, 0.029, 0.111, 0.139, 0.003)

# Combine the individual vectors into a list

targets.ca2019 <- list(sex_ca2019_w, age_ca2019_w, education_ca2019_w, province_ca2019_w, income_ca2019_w)
names(targets.ca2019) <- c("sex_ca2019_w", "age_ca2019_w", "education_ca2019_w", "province_ca2019_w", "income_ca2019_w")

################################################################################
## SURVEY DESIGN: QUEBEC 2022 (CENSUS 2016)
################################################################################

# Create survey design object using 2016 Census PUM and weights

svy.pum2016.qc2022 <- svydesign(ids=~1, data=pum2016.qc2022, weights=pum2016.qc2022$weight) 

# Create targets

sex_qc2022_w <- svytable(~sex_qc2022_t, design=svy.pum2016.qc2022) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

age_qc2022_w <- svytable(~age_qc2022_t, design=svy.pum2016.qc2022) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

education_qc2022_w <- svytable(~education_qc2022_t, design=svy.pum2016.qc2022) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

income_qc2022_w <- svytable(~income_qc2022_t, design=svy.pum2016.qc2022) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

# Create target vectors

sex_qc2022_w <- c(0.51, 0.49)
age_qc2022_w <- c(0.028, 0.078, 0.077, 0.081, 0.086, 0.079, 0.081, 0.098, 
                  0.099, 0.086, 0.075, 0.056, 0.037, 0.023, 0.017)
education_qc2022_w  <- c(0.176, 0.217, 0.394, 0.161, 0.052)
income_qc2022_w <- c(0.141, 0.258, 0.220, 0.110, 0.140, 0.075, 0.056)

# Combine the individual vectors into a list

targets.qc2022 <- list(sex_qc2022_w, age_qc2022_w, education_qc2022_w, income_qc2022_w)
names(targets.qc2022) <- c("sex_qc2022_w", "age_qc2022_w", "education_qc2022_w", "income_qc2022_w")

################################################################################
## SURVEY DESIGN: QUEBEC 2022 (CENSUS 2021)
################################################################################

# Create survey design object using 2016 Census PUM and weights

svy.pum2021.qc2022 <- svydesign(ids=~1, data=pum2021.qc2022, weights=pum2021.qc2022$weight) 

# Create targets

sex_qc2022_w <- svytable(~sex_qc2022_t, design=svy.pum2021.qc2022) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

age_qc2022_w <- svytable(~age_qc2022_t, design=svy.pum2021.qc2022) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

education_qc2022_w <- svytable(~education_qc2022_t, design=svy.pum2021.qc2022) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

income_qc2022_w <- svytable(~income_qc2022_t, design=svy.pum2021.qc2022) %>%
  prop.table() %>%
  round(digits=3) %>%
  as.numeric()

# Create target vectors

sex_qc2022_w <- c(0.507, 0.493)
age_qc2022_w <- c(0.025, 0.068, 0.079, 0.080, 0.084, 0.086, 0.077, 0.077, 0.093, 0.092, 0.080, 0.066, 0.047, 0.027, 0.020)
education_qc2022_w  <- c(0.157, 0.216, 0.383, 0.180, 0.063)
income_qc2022_w <- c(0.092, 0.209, 0.204, 0.118, 0.170, 0.112, 0.094)

# Combine the individual vectors into a list

targets.qc2022 <- list(sex_qc2022_w, age_qc2022_w, education_qc2022_w, income_qc2022_w)
names(targets.qc2022) <- c("sex_qc2022_w", "age_qc2022_w", "education_qc2022_w", "income_qc2022_w")

################################################################################
## CANADA 2011
################################################################################ 

# Load data

merge <- read_dta("C:/Users/pmongrain/Desktop/main/article_5/submission/merge.dta")

ca2011 <- subset(merge, merge$election == "ca2011") #restrict data to adults

ca2011 <- as.data.frame(ca2011)
class(ca2011)

# Remove cases with NAs on weighting variables

ca2011 <- subset(ca2011, !is.na(sex_ca2011_w))
ca2011$sex_ca2011_w <- as.numeric(ca2011$sex_ca2011_w)

ca2011 <- subset(ca2011, !is.na(age_ca2011_w))
ca2011$age_ca2011_w <- as.numeric(ca2011$age_ca2011_w)

ca2011 <- subset(ca2011, !is.na(education_ca2011_w))
ca2011$education_ca2011_w <- as.numeric(ca2011$education_ca2011_w)

ca2011 <- subset(ca2011, !is.na(income_ca2011_w))
ca2011$income_ca2011_w <- as.numeric(ca2011$income_ca2011_w)

ca2011 <- subset(ca2011, !is.na(province_ca2011_w))
ca2011$province_ca2011_w <- as.numeric(ca2011$province_ca2011_w)

anesrakefinder(targets.ca2011, ca2011, choosemethod = "total")

# Create the weights

weights.ca2011 <- anesrake(targets.ca2011, ca2011, 
                  caseid = ca2011$responseid, cap = 5, type = "nolim")

# Store the weights as a variable

ca2011$ca2011_svy_weight  <- unlist(weights.ca2011[1])

ca2011.exit <- subset(ca2011, ca2011$type == "exit")
ca2011.invitation <- subset(ca2011, ca2011$type == "invitation")

write.dta(ca2011.exit[,c("election","responseid","ca2011_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/ca2011_exit_weight.dta")

write.dta(ca2011.invitation[,c("election","responseid","ca2011_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/ca2011_invitation_weight.dta")

# Create a svy object using the new weights

svy.w.ca2011 <- svydesign(ids = ~1, data = ca2011, weights = ca2011$ca2011_svy_weight)

# Unweighted proportion table

prop.table(table(ca2011$sex_ca2011_w))

# Weighted proportion table

prop.table(svytable(~sex_ca2011_w, design = svy.w.ca2011))

################################################################################
## ONTARIO 2011
################################################################################ 

# Load data

on2011 <- subset(merge, merge$election == "on2011") #restrict data to adults

on2011 <- as.data.frame(on2011)
class(on2011)

# Remove cases with NAs on weighting variables

on2011 <- subset(on2011, !is.na(sex_on2011_w))
on2011$sex_on2011_w <- as.numeric(on2011$sex_on2011_w)

on2011 <- subset(on2011, !is.na(age_on2011_w))
on2011$age_on2011_w <- as.numeric(on2011$age_on2011_w)

on2011 <- subset(on2011, !is.na(education_on2011_w))
on2011$education_on2011_w <- as.numeric(on2011$education_on2011_w)

on2011 <- subset(on2011, !is.na(income_on2011_w))
on2011$income_on2011_w <- as.numeric(on2011$income_on2011_w)

anesrakefinder(targets.on2011, on2011, choosemethod = "total")

# Create the weights

weights.on2011 <- anesrake(targets.on2011, on2011, 
                           caseid = on2011$responseid, cap = 5, type = "nolim")

# Store the weights as a variable

on2011$on2011_svy_weight  <- unlist(weights.on2011[1])

on2011.exit <- subset(on2011, on2011$type == "exit")
on2011.invitation <- subset(on2011, on2011$type == "invitation")

write.dta(on2011.exit[,c("election","responseid","on2011_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/on2011_exit_weight.dta")

write.dta(on2011.invitation[,c("election","responseid","on2011_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/on2011_invitation_weight.dta")

# Create a svy object using the new weights

svy.w.on2011 <- svydesign(ids = ~1, data = on2011, weights = on2011$on2011_svy_weight)

# Unweighted proportion table

prop.table(table(on2011$sex_on2011_w))

# Weighted proportion table

prop.table(svytable(~sex_on2011_w, design = svy.w.on2011))

################################################################################
## ONTARIO 2014
################################################################################ 

# Load data

on2014 <- subset(merge, merge$election == "on2014") #restrict data to adults

on2014 <- as.data.frame(on2014)
class(on2014)

# Remove cases with NAs on weighting variables

on2014 <- subset(on2014, !is.na(sex_on2014_w))
on2014$sex_on2014_w <- as.numeric(on2014$sex_on2014_w)

on2014 <- subset(on2014, !is.na(age_on2014_w))
on2014$age_on2014_w <- as.numeric(on2014$age_on2014_w)

on2014 <- subset(on2014, !is.na(education_on2014_w))
on2014$education_on2014_w <- as.numeric(on2014$education_on2014_w)

on2014 <- subset(on2014, !is.na(income_on2014_w))
on2014$income_on2014_w <- as.numeric(on2014$income_on2014_w)

anesrakefinder(targets.on2014, on2014, choosemethod = "total")

# Create the weights

weights.on2014 <- anesrake(targets.on2014, on2014, 
                           caseid = on2014$responseid, cap = 5, type = "nolim")

# Store the weights as a variable

on2014$on2014_svy_weight  <- unlist(weights.on2014[1])

write.dta(on2014[,c("election","responseid","on2014_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/on2014_weight.dta")

# Create a svy object using the new weights

svy.w.on2014 <- svydesign(ids = ~1, data = on2014, weights = on2014$on2014_svy_weight)

# Unweighted proportion table

prop.table(table(on2014$sex_on2014_w))

# Weighted proportion table

prop.table(svytable(~sex_on2014_w, design = svy.w.on2014))

################################################################################
## CANADA 2015
################################################################################ 

# Load data

ca2015 <- subset(merge, merge$election == "ca2015") #restrict data to adults

ca2015 <- as.data.frame(ca2015)
class(ca2015)

# Remove cases with NAs on weighting variables

ca2015 <- subset(ca2015, !is.na(sex_ca2015_w))
ca2015$sex_ca2015_w <- as.numeric(ca2015$sex_ca2015_w)

ca2015 <- subset(ca2015, !is.na(age_ca2015_w))
ca2015$age_ca2015_w <- as.numeric(ca2015$age_ca2015_w)

ca2015 <- subset(ca2015, !is.na(education_ca2015_w))
ca2015$education_ca2015_w <- as.numeric(ca2015$education_ca2015_w)

ca2015 <- subset(ca2015, !is.na(income_ca2015_w))
ca2015$income_ca2015_w <- as.numeric(ca2015$income_ca2015_w)

ca2015 <- subset(ca2015, !is.na(province_ca2015_w))
ca2015$province_ca2015_w <- as.numeric(ca2015$province_ca2015_w)

anesrakefinder(targets.ca2015, ca2015, choosemethod = "total")

# Create the weights

weights.ca2015 <- anesrake(targets.ca2015, ca2015, 
                           caseid = ca2015$responseid, cap = 5, type = "nolim")

# Store the weights as a variable

ca2015$ca2015_svy_weight  <- unlist(weights.ca2015[1])

ca2015.exit <- subset(ca2015, ca2015$type == "exit")
ca2015.lpp <- subset(ca2015, ca2015$type == "lpp")

write.dta(ca2015.exit[,c("election","responseid","ca2015_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/ca2015_exit_weight.dta")

write.dta(ca2015.lpp[,c("election","responseid","ca2015_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/ca2015_lpp_weight.dta")

# Create a svy object using the new weights

svy.w.ca2015 <- svydesign(ids = ~1, data = ca2015, weights = ca2015$ca2015_svy_weight)

# Unweighted proportion table

prop.table(table(ca2015$sex_ca2015_w))

# Weighted proportion table

prop.table(svytable(~sex_ca2015_w, design = svy.w.ca2015))

################################################################################
## CANADA 2019 (CENSUS 2021)
################################################################################ 

# Load data

ca2019 <- subset(merge, merge$election == "ca2019") #restrict data to adults

ca2019 <- as.data.frame(ca2019)
class(ca2019)

# Remove cases with NAs on weighting variables

ca2019 <- subset(ca2019, !is.na(sex_ca2019_w))
ca2019$sex_ca2019_w <- as.numeric(ca2019$sex_ca2019_w)

ca2019 <- subset(ca2019, !is.na(age_ca2019_w))
ca2019$age_ca2019_w <- as.numeric(ca2019$age_ca2019_w)

ca2019 <- subset(ca2019, !is.na(education_ca2019_w))
ca2019$education_ca2019_w <- as.numeric(ca2019$education_ca2019_w)

ca2019 <- subset(ca2019, !is.na(income_ca2019_w))
ca2019$income_ca2019_w <- as.numeric(ca2019$income_ca2019_w)

ca2019 <- subset(ca2019, !is.na(province_ca2019_w))
ca2019$province_ca2019_w <- as.numeric(ca2019$province_ca2019_w)

anesrakefinder(targets.ca2019, ca2019, choosemethod = "total")

# Create the weights

weights.ca2019 <- anesrake(targets.ca2019, ca2019, 
                           caseid = ca2019$responseid, cap = 5, type = "nolim")

# Store the weights as a variable

ca2019$ca2019_svy_weight  <- unlist(weights.ca2019[1])

ca2019.phone <- subset(ca2019, ca2019$type == "phone")
ca2019.web <- subset(ca2019, ca2019$type == "web")

write.dta(ca2019.phone[,c("election","responseid","ca2019_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/ca2019_phone_weight.dta")

write.dta(ca2019.web[,c("election","responseid","ca2019_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/ca2019_web_weight.dta")

# Create a svy object using the new weights

svy.w.ca2019 <- svydesign(ids = ~1, data = ca2019, weights = ca2019$ca2019_svy_weight)

# Unweighted proportion table

prop.table(table(ca2019$sex_ca2019_w))

# Weighted proportion table

prop.table(svytable(~sex_ca2019_w, design = svy.w.ca2019))

################################################################################
## QUEBEC 2022 (CENSUS 2021)
################################################################################ 

# Load data

qc2022 <- subset(merge, merge$election == "qc2022") #restrict data to adults

qc2022 <- as.data.frame(qc2022)
class(qc2022)

# Remove cases with NAs on weighting variables

qc2022 <- subset(qc2022, !is.na(sex_qc2022_w))
qc2022$sex_qc2022_w <- as.numeric(qc2022$sex_qc2022_w)

qc2022 <- subset(qc2022, !is.na(age_qc2022_w))
qc2022$age_qc2022_w <- as.numeric(qc2022$age_qc2022_w)

qc2022 <- subset(qc2022, !is.na(education_qc2022_w))
qc2022$education_qc2022_w <- as.numeric(qc2022$education_qc2022_w)

qc2022 <- subset(qc2022, !is.na(income_qc2022_w))
qc2022$income_qc2022_w <- as.numeric(qc2022$income_qc2022_w)

anesrakefinder(targets.qc2022, qc2022, choosemethod = "total")

# Create the weights

weights.qc2022 <- anesrake(targets.qc2022, qc2022, 
                           caseid = qc2022$responseid, cap = 5, type = "nolim")

# Store the weights as a variable

qc2022$qc2022_svy_weight  <- unlist(weights.qc2022[1])

write.dta(qc2022[,c("election","responseid","qc2022_svy_weight")], 
          "C:/Users/pmongrain/Desktop/main/article_5/submission/qc2022_weight.dta")

# Create a svy object using the new weights

svy.w.qc2022 <- svydesign(ids = ~1, data = qc2022, weights = qc2022$qc2022_svy_weight)

# Unweighted proportion table

prop.table(table(qc2022$sex_qc2022_w))

# Weighted proportion table

prop.table(svytable(~sex_qc2022_w, design = svy.w.qc2022))